We will clean the environment, setup the locations, define colors, and create a datestamp.
Clean the environment.
Set locations and working directories…
Create a new analysis directory...
[1] FALSE
[1] FALSE
[1] FALSE
[1] FALSE
[1] FALSE
[1] FALSE
[1] "/Users/swvanderlaan/PLINK/analyses/consortia/CHARGE_1000G_CAC"
[1] "_archived" "1. CHARGE_1000G_CAC.nb.html" "1. CHARGE_1000G_CAC.Rmd"
[4] "2. bulkRNAseq.nb.html" "2. bulkRNAseq.Rmd" "20220128.CAC.RegionalAssociationPlots.RData"
[7] "20220129.CAC.Parsing_GWASSumStats.RData" "20220131.CAC.PolarMorphism.RData" "20220201.CAC.PolarMorphism.RData"
[10] "3. scRNAseq.nb.html" "3. scRNAseq.Rmd" "4. Parsing_GWASSumStats.nb.html"
[13] "4. Parsing_GWASSumStats.Rmd" "5. RegionalAssociationPlots.nb.html" "5. RegionalAssociationPlots.Rmd"
[16] "6. PolarMorphism.nb.html" "6. PolarMorphism.Rmd" "6. PolarMorphismbeta.nb.html"
[19] "6. PolarMorphismbeta.Rmd" "CAC" "CHARGE_1000G_CAC.Rproj"
[22] "CredibleSets" "images" "LICENSE"
[25] "PolarMorphism" "RACER" "README.html"
[28] "README.md" "README.orig.md" "renv"
[31] "renv.lock" "scripts" "SNP"
[34] "targets"
… a package-installation function …
source(paste0(PROJECT_loc, "/scripts/functions.R"))… and load those packages.
install.packages.auto("readr")
install.packages.auto("optparse")
install.packages.auto("tools")
install.packages.auto("dplyr")
install.packages.auto("tidyr")
install.packages.auto("naniar")
# To get 'data.table' with 'fwrite' to be able to directly write gzipped-files
# Ref: https://stackoverflow.com/questions/42788401/is-possible-to-use-fwrite-from-data-table-with-gzfile
# install.packages("data.table", repos = "https://Rdatatable.gitlab.io/data.table")
library(data.table)
install.packages.auto("tidyverse")
install.packages.auto("DT")
install.packages.auto("knitr")
install.packages.auto("eeptools")
install.packages.auto("haven")
install.packages.auto("tableone")
install.packages.auto("BlandAltmanLeh")
# Install the devtools package from Hadley Wickham
install.packages.auto('devtools')
library(devtools)
# for plotting
install.packages.auto("pheatmap")
install.packages.auto("forestplot")
install.packages.auto("ggplot2")
install.packages.auto("ggpubr")
install.packages.auto("ggrepel")
install.packages.auto("UpSetR")
devtools::install_github("thomasp85/patchwork")Using github PAT from envvar GITHUB_PAT
Skipping install of 'patchwork' from a github remote, the SHA1 (79223d30) has not changed since last install.
Use `force = TRUE` to force installation
# For regional association plots
install_github("oliviasabik/RACER") Using github PAT from envvar GITHUB_PAT
Skipping install of 'RACER' from a github remote, the SHA1 (1394c9d4) has not changed since last install.
Use `force = TRUE` to force installation
# Install ggrepel package if needed
# install.packages.auto(ggrepel)
library(ggrepel)
install.packages.auto(qvalue)Loading required package: qvalue
We will create a datestamp and define the Utrecht Science Park Colour Scheme.
Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d")
Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y")
### UtrechtScienceParkColoursScheme
###
### WebsitetoconvertHEXtoRGB:http://hex.colorrrs.com.
### Forsomefunctionsyoushoulddividethesenumbersby255.
###
### No. Color HEX (RGB) CHR MAF/INFO
###---------------------------------------------------------------------------------------
### 1 yellow #FBB820 (251,184,32) => 1 or 1.0>INFO
### 2 gold #F59D10 (245,157,16) => 2
### 3 salmon #E55738 (229,87,56) => 3 or 0.05<MAF<0.2 or 0.4<INFO<0.6
### 4 darkpink #DB003F ((219,0,63) => 4
### 5 lightpink #E35493 (227,84,147) => 5 or 0.8<INFO<1.0
### 6 pink #D5267B (213,38,123) => 6
### 7 hardpink #CC0071 (204,0,113) => 7
### 8 lightpurple #A8448A (168,68,138) => 8
### 9 purple #9A3480 (154,52,128) => 9
### 10 lavendel #8D5B9A (141,91,154) => 10
### 11 bluepurple #705296 (112,82,150) => 11
### 12 purpleblue #686AA9 (104,106,169) => 12
### 13 lightpurpleblue #6173AD (97,115,173/101,120,180) => 13
### 14 seablue #4C81BF (76,129,191) => 14
### 15 skyblue #2F8BC9 (47,139,201) => 15
### 16 azurblue #1290D9 (18,144,217) => 16 or 0.01<MAF<0.05 or 0.2<INFO<0.4
### 17 lightazurblue #1396D8 (19,150,216) => 17
### 18 greenblue #15A6C1 (21,166,193) => 18
### 19 seaweedgreen #5EB17F (94,177,127) => 19
### 20 yellowgreen #86B833 (134,184,51) => 20
### 21 lightmossgreen #C5D220 (197,210,32) => 21
### 22 mossgreen #9FC228 (159,194,40) => 22 or MAF>0.20 or 0.6<INFO<0.8
### 23 lightgreen #78B113 (120,177,19) => 23/X
### 24 green #49A01D (73,160,29) => 24/Y
### 25 grey #595A5C (89,90,92) => 25/XY or MAF<0.01 or 0.0<INFO<0.2
### 26 lightgrey #A2A3A4 (162,163,164) => 26/MT
###
### ADDITIONAL COLORS
### 27 midgrey #D7D8D7
### 28 verylightgrey #ECECEC"
### 29 white #FFFFFF
### 30 black #000000
###----------------------------------------------------------------------------------------------
uithof_color = c("#FBB820","#F59D10","#E55738","#DB003F","#E35493","#D5267B",
"#CC0071","#A8448A","#9A3480","#8D5B9A","#705296","#686AA9",
"#6173AD","#4C81BF","#2F8BC9","#1290D9","#1396D8","#15A6C1",
"#5EB17F","#86B833","#C5D220","#9FC228","#78B113","#49A01D",
"#595A5C","#A2A3A4", "#D7D8D7", "#ECECEC", "#FFFFFF", "#000000")
uithof_color_legend = c("#FBB820", "#F59D10", "#E55738", "#DB003F", "#E35493",
"#D5267B", "#CC0071", "#A8448A", "#9A3480", "#8D5B9A",
"#705296", "#686AA9", "#6173AD", "#4C81BF", "#2F8BC9",
"#1290D9", "#1396D8", "#15A6C1", "#5EB17F", "#86B833",
"#C5D220", "#9FC228", "#78B113", "#49A01D", "#595A5C",
"#A2A3A4", "#D7D8D7", "#ECECEC", "#FFFFFF", "#000000")
### ----------------------------------------------------------------------------We will apply PolarMorphism to compare the CAC GWAS vs cIMT, CAD, and ischemic stroke subtypes.
library(devtools)
install_github("UMCUgenetics/PolarMorphism")Using github PAT from envvar GITHUB_PAT
Downloading GitHub repo UMCUgenetics/PolarMorphism@HEAD
checking for file ‘/private/var/folders/cj/1vxt4xb11m1cx7wn020f8hww0000gn/T/RtmpQGmRRV/remotes1cc0284d0cfc/UMCUGenetics-PolarMorphism-1d1bfd7/DESCRIPTION’ ...
✓ checking for file ‘/private/var/folders/cj/1vxt4xb11m1cx7wn020f8hww0000gn/T/RtmpQGmRRV/remotes1cc0284d0cfc/UMCUGenetics-PolarMorphism-1d1bfd7/DESCRIPTION’ (342ms)
─ preparing ‘PolarMorphism’:
✓ checking DESCRIPTION meta-information
─ excluding invalid files
Subdirectory 'R' contains invalid file names:
‘kappas.4foldtransform.Rda’
─ checking for LF line-endings in source and make files and shell scripts
─ checking for empty or unneeded directories
Omitted ‘LazyData’ from DESCRIPTION
NB: this package now depends on R (>= 3.5.0)
WARNING: Added dependency on R >= 3.5.0 because serialized objects in
serialize/load version 3 cannot be read in older versions of R.
File(s) containing such objects:
‘PolarMorphism/R/sysdata.rda’
─ building ‘PolarMorphism_0.0.0.9000.tar.gz’
Installing package into ‘/Users/swvanderlaan/Library/R/x86_64/4.1/library’
(as ‘lib’ is unspecified)
* installing *source* package ‘PolarMorphism’ ...
** using staged installation
** R
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded from temporary location
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (PolarMorphism)
library(PolarMorphism)We need to load and parse the data first.
We used gwas2cojo to parse the datasets with 1000G phase 3 as a reference. This resulted in cleaned and harmonized GWAS datasets with standardized headings, and containing rsIDs that are also present in 1000G. For some GWAS studies, missing beta or se were recovered from z-scores or missing chr:bp positions/frequencies from rsids using the dbSNP153 COMMON database. Prior to conversion, non-biallelic SNPs were removed from the genomic reference. Therefore, some triallelic SNPs may not be present.
Phenotype Short Source File PMID
Coronary calcification CAC
Coronary artery disease CAD UKBB.GWAS1KG.EXOME.CAD.SOFT.META.PublicRelease.300517.txt.gz 28714975
Any stroke AS MEGASTROKE.1.AS.EUR.GC_filtered_X_nocases_het.txt.gz 29531354
Any ischemic stroke IS MEGASTROKE.2.IS.EUR.GC_filtered_X_nocases_het.txt.gz 29531354
Large artery stroke LAS MEGASTROKE.3.LAS.EUR.GC_filtered_X_nocases_het.txt.gz 29531354
Cardio-embolic stroke CES MEGASTROKE.4.CE.EUR.GC_filtered_X_nocases_het.txt.gz 29531354
Small vessel disease SVD MEGASTROKE.5.SVD.EUR.GC_filtered_X_nocases_het.txt.gz 29531354
Carotid IMT cIMT IMT.EA.META.MAF1.HetDF4_jun.csv.gz 30586722
Plaque presence Plaque Plaque_meta_032218.csv.gz 30586722
COJO_loc = paste0(GWAS_loc, "/_cojo/rsid")
# traits <- c("CAD", "AS", "IS", "LAS", "CES", "SVD", "cIMT", "Plaque")
traits <- c("CAD")
for(trait in traits){
cat(paste0("\nProcessing data for [", trait, "] (",paste0(COJO_loc, "/", trait, ".cojo.gz"),").\n"))
tbl <- as_tibble(fread(paste0(COJO_loc, "/", trait, ".cojo.gz"),
verbose = FALSE,
showProgress = TRUE,
nThread = 4))
# the column names are "SNP" "A1" "A2" "freq" "b" "se" "p" "n"
# we have to change them so PolarMorphism knows what each column contains
colnames(tbl) <- c("snpid","a1","a2","freq","beta","se","pval", "n") # note that PolarMorphism does not need or use the "n" column
assign(trait, tbl)
}
Processing data for [CAD] (/Users/swvanderlaan//PLINK/_GWAS_Datasets/_cojo/rsid/CAD.cojo.gz).
|--------------------------------------------------|
|==================================================|
rm(tbl)cat("\nGWAS: coronary artery calcification, CAC:\n")
GWAS: coronary artery calcification, CAC:
gwas_sumstats <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_complete.rds"))
head(gwas_sumstats)nrow(gwas_sumstats)[1] 8586047
library(tidyverse)
CACtib <- as_tibble(gwas_sumstats %>%
select(rsID, Chr, Position, Allele1, Allele2, Freq1, Effect, SE, Pvalue, N) %>% # end of select
mutate(Allele1 = toupper(Allele1)) %>% # change all characters to uppercase
mutate(Allele2 = toupper(Allele2)) %>%
rename( # rename variables
BP = Position, # new vs old
snpid = rsID,
a1 = Allele1,
a2 = Allele2,
freq = Freq1,
beta = Effect,
se = SE,
pval = Pvalue,
n = N
) %>% # end of rename
relocate(snpid, Chr, BP, a1, a2, freq, beta, se, pval, n) # set the order of columns
)
head(CACtib)str(CACtib)tibble [8,586,047 × 10] (S3: tbl_df/tbl/data.frame)
$ snpid: chr [1:8586047] "rs61769339" "rs61769350" "." "rs189800799" ...
$ Chr : int [1:8586047] 1 1 1 1 1 1 1 1 1 1 ...
$ BP : int [1:8586047] 662622 693731 697411 701835 706368 712930 713131 714019 714596 715265 ...
$ a1 : chr [1:8586047] "A" "A" "D" "T" ...
$ a2 : chr [1:8586047] "G" "G" "I" "C" ...
$ freq : num [1:8586047] 0.114 0.871 0.969 0.969 0.644 ...
$ beta : num [1:8586047] 0.1104 -0.0377 0.0305 -0.1051 0.1408 ...
$ se : num [1:8586047] 0.0791 0.0691 0.196 0.1573 0.0862 ...
$ pval : num [1:8586047] 0.163 0.585 0.876 0.504 0.102 ...
$ n : int [1:8586047] 8357 8781 4445 5878 3074 6709 6709 5654 7998 7998 ...
- attr(*, ".internal.selfref")=<externalptr>
nrow(CACtib)[1] 8586047
rm(gwas_sumstats)Let’s save this work for easy access.
saveRDS(CACtib, file = paste0(OUT_loc, "/", Today, ".CAC.rds"))
saveRDS(CAD, file = paste0(OUT_loc, "/", Today, ".CAD.rds"))
# rm(CACtib, CAD)
#
# saveRDS(CIMT, file = paste0(OUT_loc, "/", Today, ".CIMT.rds"))
# saveRDS(PLAQ, file = paste0(OUT_loc, "/", Today, ".PLAQ.rds"))
#
# rm(CIMT, PLAQ)
#
# saveRDS(AS, file = paste0(OUT_loc, "/", Today, ".AS.rds"))
# saveRDS(IS, file = paste0(OUT_loc, "/", Today, ".IS.rds"))
# saveRDS(LAS, file = paste0(OUT_loc, "/", Today, ".LAS.rds"))
# saveRDS(CE, file = paste0(OUT_loc, "/", Today, ".CE.rds"))
# saveRDS(SVD, file = paste0(OUT_loc, "/", Today, ".SVD.rds"))
#
# rm(AS, IS, LAS, CE, SVD)
CACtib <- readRDS(file = paste0(OUT_loc, "/20220131.CAC.rds"))
CADtib <- readRDS(file = paste0(OUT_loc, "/20220131.CAD.rds"))
# CIMTtib <- readRDS(file = paste0(OUT_loc, "/20220131.CIMT.rds"))
# PLAQtib <- readRDS(file = paste0(OUT_loc, "/20220131.PLAQ.rds"))
#
# AStib <- readRDS(file = paste0(OUT_loc, "/20220131.AS.rds"))
# IStib <- readRDS(file = paste0(OUT_loc, "/20220131.IS.rds"))
# LAStib <- readRDS(file = paste0(OUT_loc, "/20220131.LAS.rds"))
# CEtib <- readRDS(file = paste0(OUT_loc, "/20220131.CE.rds"))
# SVDtib <- readRDS(file = paste0(OUT_loc, "/20220131.SVD.rds"))We need to choose one of the GWAS as reference to make sure all GWASs have the same reference and alternative allele for each SNP. We will make CAC the reference, and ‘flip’ the alleles of all the GWASs so they align with CAC.
There are some variants without ‘rsID’, we are going to remove these.
CACtibf <- CACtib %>%
filter(snpid!='.') %>% # end of rename
relocate(snpid, beta, se, a1, a2, freq, pval, Chr, BP, n) # set the order of columns
rm(CACtib)We will make the list of SNPs for alignment.
variants_of_interest <- CACtibf$snpid[grepl("rs", CACtibf$snpid)]Because the function Alleleflip not only flips the alleles, but also adds a z-score column, we have to manually do that for CAC.
CACtibf$z <- CACtibf$beta/CACtibf$seCADtib <- PolarMorphism::AlleleFlip(sumstats = CAD, snps = CACtibf %>% select(snpid, a1, a2), snpid = "snpid", only.a2 = F)
rm(CAD)Now we are converting GWAS to polar-coordinates.
CAC_CADtibf <- PolarMorphism::ConvertToPolar(dfnames = c("CACtibf", "CADtib"), # not clear how this should be given
snpid = "snpid",
whiten = TRUE,
covsnps = variants_of_interest, # is this required?
mahalanobis.threshold = 5,
whitening.method = "ZCA-cor",
LDcorrect = FALSE, # we don't need that as we de-correlate through whitening
ld.path = paste0(PROJECT_loc,"/eur_w_ld_chr/")) [1] "number of SNPs used for cov: 6988432"
head(CAC_CADtibf)str(CAC_CADtibf)tibble [6,993,034 × 19] (S3: tbl_df/tbl/data.frame)
$ snpid : chr [1:6993034] "rs143225517" "rs2073813" "rs3131969" "rs3131968" ...
$ Chr : int [1:6993034] 1 1 1 1 1 1 1 1 1 1 ...
$ BP : int [1:6993034] 751756 753541 754182 754192 754334 755890 756604 757640 757734 757936 ...
$ beta.1 : num [1:6993034] -0.0001 0.0036 0.0002 0.0005 0.0058 0.0032 0.0029 -0.0167 -0.0069 -0.0072 ...
$ beta.2 : num [1:6993034] -0.00546 0.00184 0.00361 0.00373 0.005 0.00441 0.00162 -0.00343 -0.00684 -0.00728 ...
$ se.1 : num [1:6993034] 0.0291 0.0309 0.0301 0.0302 0.0304 0.0288 0.0288 0.0328 0.0284 0.0285 ...
$ se.2 : num [1:6993034] 0.0142 0.0138 0.0137 0.0137 0.0138 ...
$ pval.1 : num [1:6993034] 0.998 0.906 0.996 0.987 0.85 ...
$ pval.2 : num [1:6993034] 0.7 0.894 0.792 0.785 0.718 ...
$ a1.1 : chr [1:6993034] "T" "A" "A" "A" ...
$ a1.2 : chr [1:6993034] "T" "A" "A" "A" ...
$ a1.b : chr [1:6993034] "C" "A" "G" "G" ...
$ a2.1 : chr [1:6993034] "C" "G" "G" "G" ...
$ a2.2 : chr [1:6993034] "C" "G" "G" "G" ...
$ a2.b : chr [1:6993034] "T" "G" "A" "A" ...
$ r : num [1:6993034] 0.372 0.162 0.254 0.262 0.379 ...
$ angle : num [1:6993034] -0.1456 -2.8408 -0.081 -0.0606 -1.8259 ...
$ z.whitened.1: num [1:6993034] 0.01355 0.10594 -0.00515 0.00397 0.16721 ...
$ z.whitened.2: num [1:6993034] -0.372 0.123 0.254 0.262 0.341 ...
- attr(*, ".internal.selfref")=<externalptr>
temp <- CAC_CADtibf %>%
filter(r > 4)
# set the limits
max_z_1 <- max(temp$z.whitened.1)
max_z_2 <- max(temp$z.whitened.2)
lim <- round(
ifelse(max_z_2 < max_z_1, max_z_1, max_z_2),
digits = 0)
rm(max_z_1, max_z_2)gwas_z <- abs(qnorm(5e-8))
ggpubr::ggscatter(temp,
x = "z.whitened.1",
y = "z.whitened.2",
color = uithof_color[16],
xlim = c(-lim,lim),
ylim = c(-lim,lim),
xlab = "CAC (z, whitened)",
ylab = "CAD (z, whitened)") +
geom_hline(yintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
geom_hline(yintercept = -gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
geom_vline(xintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
geom_vline(xintercept = -gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
theme(aspect.ratio = 1) +
theme_minimal()rm(temp)We calculated the central angle between observed and expected SNP position under the null-hypothesis of trait-specific effects, and the radius (r) indicating the size of the effect.
Now we calculate the p-value for the radius, r.
library(qvalue)
# p-value & q-value for r
CAC_CADtibf$r.pval <- PolarMorphism::PvalueForR(r = CAC_CADtibf$r,
p = 2)
CAC_CADtibf$r.qval <- qvalue(p = CAC_CADtibf$r.pval)$qvaluesSecond, we calculate the the Von Mises distribution p-value for the angle, theta, but only for the subset of variants where the false-discovery rate q-value is significant, q < 0.05.
# filter on r, for p-value & q-value for theta
PolarMorphies <- CAC_CADtibf[CAC_CADtibf$r.qval < 0.05,]
PolarMorphies$theta.pval <- PolarMorphism::PvalueForAngle(angle.trans = PolarMorphies$angle,
r = PolarMorphies$r,
# tol = 1e-20,
kappa.file = paste0(PROJECT_loc, "/PolarMorphism/kappas.4foldtransform.Rda"),
debug = FALSE)
# some theta.pval are smaller than 0
PolarMorphies$theta.pval[PolarMorphies$theta.pval < 0] <- 0
# min(PolarMorphies$theta.pval)
PolarMorphies$theta.qval <- qvalue(p = PolarMorphies$theta.pval)$qvalues
PolarMorphies <- PolarMorphies %>%
mutate(ThetaQ = case_when(theta.qval < 0.05 & pval.1 > 5e-8 & pval.2 > 5e-8~ 'novel',
theta.qval < 0.05 & pval.1 < 5e-8 & pval.2 > 5e-8~ 'sign. GWAS CAC',
theta.qval < 0.05 & pval.1 > 5e-8 & pval.2 < 5e-8~ 'sign. GWAS CAD',
TRUE ~ 'not significant'))# filter on theta
PolarMorphies %>%
ggplot(aes(x = abs(angle), y = r, color = theta.qval < 0.05)) +
geom_point()
PolarMorphies %>%
ggplot(aes(x = abs(z.whitened.1), y = abs(z.whitened.2), color = theta.qval < 0.05)) +
theme(aspect.ratio = 1) +
xlim(0,lim) +
ylim(0,lim) +
geom_point()Warning: Removed 2 rows containing missing values (geom_point).
PolarMorphies$abs.z.whitened.1 <- abs(PolarMorphies$z.whitened.1)
PolarMorphies$abs.z.whitened.2 <- abs(PolarMorphies$z.whitened.2)
ggpubr::ggscatter(PolarMorphies,
x = "abs.z.whitened.1",
y = "abs.z.whitened.2",
color = "ThetaQ", palette = "npg",
xlim = c(0,lim),
ylim = c(0,lim),
xlab = "CAC\n(z, whitened, abs)",
ylab = "CAD\n(z, whitened, abs)") +
geom_hline(yintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
geom_vline(xintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
theme(aspect.ratio = 1) +
theme_minimal()NA
NA
NAPolarMorphies
# fix so that ieugwasr::ld_clump will work
PolarMorphies <- rename(PolarMorphies, rsid = snpid)
PolarMorphies <- rename(PolarMorphies, pval = theta.qval) # we want to clump on the theta.qval
PolarMorphiesLDclump <- ieugwasr::ld_clump(
dat = PolarMorphies,
clump_kb = 10000,
clump_r2 = 0.001,
clump_p = 1,
pop = "EUR",
access_token = NULL
# access_token = NULL,
# bfile = NULL,
# plink_bin = NULL
)Please look at vignettes for options on running this locally if you need to run many instances of this command.
Clumping KssNjI, 8636 variants, using EUR population reference
Removing 8450 of 8636 variants due to LD with other variants or absence from LD reference panel
# fix so that PolarMorphies are correct again
PolarMorphies <- rename(PolarMorphies, snpid = rsid)
PolarMorphies <- rename(PolarMorphies, theta.qval = pval)
PolarMorphiesLDclump <- rename(PolarMorphiesLDclump, snpid = rsid)
PolarMorphiesLDclump <- rename(PolarMorphiesLDclump, theta.qval = pval)
PolarMorphiesLDclumpNA
# Reference: https://bioinformatics.stackexchange.com/questions/2503/how-to-get-a-list-of-genes-corresponding-to-the-list-of-snps-rs-ids/2504
require(biomaRt)
# To show which marts are available
listMarts()Ensembl site unresponsive, trying useast mirror
# You need the SNP mart
mart <- useMart("ENSEMBL_MART_SNP")
# Find homo sapiens
listDatasets(mart)Ensembl site unresponsive, trying uswest mirror
# This will be the dataset we want to use
dataset <- useDataset("hsapiens_snp", mart = mart)
# Show available filters
listFilters(dataset)
# Now list all available attributes
listAttributes(dataset)
# You need the SNP mart from homo sapiens
ensembl <- useMart("ENSEMBL_MART_SNP", dataset = "hsapiens_snp")# To get the ensembl gene id belonging to the SNPs
Annotations <- getBM(attributes = c("refsnp_id", "chr_name", "chrom_start", "chrom_end",
"allele", "mapweight", "validated", "allele_1", "minor_allele",
"minor_allele_freq", "minor_allele_count", "clinical_significance",
"synonym_name", "refsnp_source", "ensembl_gene_stable_id", "ensembl_gene_name"),
filters = "snp_filter", values = PolarMorphiesLDclump$snpid,
mart = ensembl,
uniqueRows = TRUE)
AnnotationsNAfwrite(PolarMorphiesLDclump, file = paste0(OUT_loc, "/PolarMorphiesLDclump.txt"), sep = "\t")proxySearch_CAC_CAD <- fread(file = paste0(PROJECT_loc, "/PolarMorphism/proxySearch_CAC_CAD/proxySearch.results.csv"))
novel_snps <- PolarMorphiesLDclump$snpid[grepl("novel", PolarMorphiesLDclump$ThetaQ)]
library(ggpubr)
p1 <- ggpubr::ggscatter(PolarMorphiesLDclump,
x = "abs.z.whitened.1",
y = "abs.z.whitened.2",
color = "ThetaQ", palette = "npg",
xlim = c(0,lim),
ylim = c(0,lim),
xlab = "CAC\n(z, whitened, abs)",
ylab = "CAD\n(z, whitened, abs)",
title = "CAC vs CAD",
label = "snpid",
label.select = novel_snps,
repel = TRUE, show.legend.text = FALSE) +
geom_hline(yintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
geom_vline(xintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
theme(aspect.ratio = 1) +
theme_minimal() + theme(legend.position = "bottom")
p2 <- ggpubr::ggscatter(PolarMorphiesLDclump,
x = "abs.z.whitened.1",
y = "abs.z.whitened.2",
color = "ThetaQ", palette = "npg",
xlim = c(0,lim/3),
ylim = c(0,lim/3),
xlab = "CAC\n(z, whitened, abs)",
ylab = "CAD\n(z, whitened, abs)",
title = "CAC vs CAD (zoomed)",
label = "snpid",
label.select = novel_snps,
repel = TRUE, show.legend.text = FALSE) +
geom_hline(yintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
geom_vline(xintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
theme(aspect.ratio = 1) +
theme_minimal() + theme(legend.position = "bottom")
p1Warning: ggrepel: 140 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggsave(filename = paste0(PLOT_loc, "/", Today, ".PolarMorphies.LDclump.CAC.CAD.pdf"), plot = p1)Saving 7.29 x 4.51 in image
Warning: ggrepel: 140 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggsave(filename = paste0(PLOT_loc, "/", Today, ".PolarMorphies.LDclump.CAC.CAD.png"), plot = p1)Saving 7.29 x 4.51 in image
Warning: ggrepel: 140 unlabeled data points (too many overlaps). Consider increasing max.overlaps
p2Warning: ggrepel: 135 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggsave(filename = paste0(PLOT_loc, "/", Today, ".PolarMorphies.LDclump.CAC.CAD.zoom.pdf"), plot = p2)Saving 7.29 x 4.51 in image
Warning: ggrepel: 135 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggsave(filename = paste0(PLOT_loc, "/", Today, ".PolarMorphies.LDclump.CAC.CAD.zoom.png"), plot = p2)Saving 7.29 x 4.51 in image
Warning: ggrepel: 135 unlabeled data points (too many overlaps). Consider increasing max.overlaps
install.packages.auto(plotly)
library(plotly)
p3 <- ggpubr::ggscatter(PolarMorphiesLDclump,
x = "abs.z.whitened.1",
y = "abs.z.whitened.2",
color = "ThetaQ",
palette = "npg",
xlim = c(0,lim/3),
ylim = c(0,lim/3),
xlab = "CAC\n(z, whitened, abs)",
ylab = "CAD\n(z, whitened, abs)",
title = "CAC vs CAD (zoomed)") +
geom_hline(yintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
geom_vline(xintercept = gwas_z, linetype = 2,
color = uithof_color[3], size = 0.5) +
theme(aspect.ratio = 1) +
theme_minimal() + theme(legend.position = "bottom")
ggplotly(p3,
source = "select",
tooltip = c("all"))NA
NAlibrary(plotly)
# https://plotly.com/r/horizontal-vertical-shapes/
vline <- function(x = 0, color = uithof_color[3]) {
list(
type = "line",
y0 = 0,
y1 = 1,
yref = "paper",
x0 = x,
x1 = x,
line = list(color = color, dash="dot")
)
}
hline <- function(y = 0, color = uithof_color[3]) {
list(
type = "line",
x0 = 0,
x1 = 1,
xref = "paper",
y0 = y,
y1 = y,
line = list(color = color, dash="dot")
)
}
# https://plotly.com/r/hover-text-and-formatting/
fig <- PolarMorphiesLDclump %>%
plot_ly(
type = "scatter",
mode = 'markers',
x = ~abs.z.whitened.1,
y = ~abs.z.whitened.2,
marker = list(size = ~r, sizeref = 0.8),
color = ~ThetaQ, colors = uithof_color,
text = ~snpid,
hovertemplate = paste(
"<b>%{text}</b><br><br>",
"%{yaxis.title.text}: %{y} (abs., whitened)<br>",
"%{xaxis.title.text}: %{x} (abs., whitened)<br>",
"<extra></extra>"
)
) %>%
layout(title = 'PolarMorphism: CAC vs CAD',
plot_bgcolor = "white",
xaxis = list(title = "CAC"),
yaxis = list(title = "CAD"),
legend = list(title=list(text='<b>Pleiotropic effect </b>')),
shapes = list(vline(gwas_z), hline(gwas_z) #,
# list(type = "rect",
# fillcolor = uithof_color[28], line = list(color = uithof_color[27]),
# opacity = 0.2,
# y0 = nom_z_y, y1 = gwas_z, x0 = nom_z_x, x1 = gwas_z)
# )
)
)
fig <- fig %>%
layout(legend = list(orientation = 'h', y = -0.3))
figNAVersion: v1.0.0
Last update: 2022-02-03
Written by: Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
Description: Script to create plot regional association plots.
Minimum requirements: R version 3.4.3 (2017-06-30) -- 'Single Candle', Mac OS X El Capitan
Changes log
* v1.0.0 Initial version.
sessionInfo()R version 4.1.2 (2021-11-01)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Monterey 12.2
Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] grid tools stats graphics grDevices utils datasets methods base
other attached packages:
[1] plotly_4.10.0 patchwork_1.1.0.9000 biomaRt_2.50.2 remotes_2.4.2 qvalue_2.26.0
[6] PolarMorphism_0.0.0.9000 UpSetR_1.4.0 ggrepel_0.9.1 ggpubr_0.4.0 forestplot_2.0.1
[11] checkmate_2.0.0 magrittr_2.0.1 pheatmap_1.0.12 devtools_2.4.3 usethis_2.1.5
[16] BlandAltmanLeh_0.3.1 tableone_0.13.0 haven_2.4.3 eeptools_1.2.4 DT_0.20
[21] knitr_1.37 forcats_0.5.1 stringr_1.4.0 purrr_0.3.4 tibble_3.1.6
[26] ggplot2_3.3.5 tidyverse_1.3.1 data.table_1.14.2 naniar_0.6.1 tidyr_1.1.4
[31] dplyr_1.0.7 optparse_1.7.1 readr_2.1.1
loaded via a namespace (and not attached):
[1] utf8_1.2.2 R.utils_2.11.0 tidyselect_1.1.1 lme4_1.1-27.1 RSQLite_2.2.9 AnnotationDbi_1.56.2
[7] htmlwidgets_1.5.4 maptools_1.1-2 munsell_0.5.0 ragg_1.2.1 withr_2.4.3 colorspace_2.0-2
[13] Biobase_2.54.0 filelock_1.0.2 rstudioapi_0.13 stats4_4.1.2 ggsignif_0.6.3 vcd_1.4-9
[19] labeling_0.4.2 GenomeInfoDbData_1.2.7 bit64_4.0.5 farver_2.1.0 rprojroot_2.0.2 coda_0.19-4
[25] vctrs_0.3.8 generics_0.1.1 xfun_0.29 BiocFileCache_2.2.0 R6_2.5.1 GenomeInfoDb_1.30.0
[31] arm_1.12-2 bitops_1.0-7 cachem_1.0.6 assertthat_0.2.1 scales_1.1.1 gtable_0.3.0
[37] processx_3.5.2 rlang_0.4.12 systemfonts_1.0.3 splines_4.1.2 lazyeval_0.2.2 rstatix_0.7.0
[43] broom_0.7.11 BiocManager_1.30.16 yaml_2.2.1 reshape2_1.4.4 abind_1.4-5 modelr_0.1.8
[49] crosstalk_1.2.0 backports_1.4.1 rsconnect_0.8.25 ellipsis_0.3.2 jquerylib_0.1.4 RColorBrewer_1.1-2
[55] BiocGenerics_0.40.0 sessioninfo_1.2.2 Rcpp_1.0.7 plyr_1.8.6 progress_1.2.2 zlibbioc_1.40.0
[61] RCurl_1.98-1.5 ps_1.6.0 prettyunits_1.1.1 S4Vectors_0.32.3 zoo_1.8-9 fs_1.5.2
[67] survey_4.1-1 whitening_1.3.0 RACER_1.0.0 lmtest_0.9-39 reprex_2.0.1 pkgload_1.2.4
[73] hms_1.1.1 evaluate_0.14 XML_3.99-0.8 readxl_1.3.1 IRanges_2.28.0 gridExtra_2.3
[79] testthat_3.1.1 compiler_4.1.2 crayon_1.4.2 minqa_1.2.4 R.oo_1.24.0 htmltools_0.5.2
[85] corpcor_1.6.10 tzdb_0.2.0 visdat_0.5.3 lubridate_1.8.0 DBI_1.1.2 dbplyr_2.1.1
[91] MASS_7.3-54 rappdirs_0.3.3 boot_1.3-28 Matrix_1.4-0 getopt_1.20.3 car_3.0-12
[97] cli_3.1.0 mitools_2.4 R.methodsS3_1.8.1 ieugwasr_0.1.5 pkgconfig_2.0.3 foreign_0.8-81
[103] sp_1.4-6 xml2_1.3.3 bslib_0.3.1 XVector_0.34.0 rvest_1.0.2 callr_3.7.0
[109] digest_0.6.29 Biostrings_2.62.0 rmarkdown_2.11 cellranger_1.1.0 curl_4.3.2 nloptr_1.2.2.3
[115] lifecycle_1.0.1 nlme_3.1-153 jsonlite_1.7.2 carData_3.0-5 viridisLite_0.4.0 desc_1.4.0
[121] fansi_1.0.0 pillar_1.6.4 ggsci_2.9 lattice_0.20-45 KEGGREST_1.34.0 fastmap_1.1.0
[127] httr_1.4.2 pkgbuild_1.3.1 survival_3.2-13 glue_1.6.0 png_0.1-7 pander_0.6.4
[133] bit_4.0.4 stringi_1.7.6 sass_0.4.0 blob_1.2.2 textshaping_0.3.6 memoise_2.0.1
save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".PolarMorphism.RData"))| © 1979-2022 Sander W. van der Laan | s.w.vanderlaan[at]gmail.com | swvanderlaan.github.io. |